import scipy
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
import warnings

def simulate_AR1(rho, sigma, T, shocks = None):
    """Simulate a time series for 
    an AR(1) process with
    
    x_{t + 1} = rho x_t + eps_{t+1}
    
    where
    
    eps_{t + 1} ~ N(0, sigma ^ 2).
    
    Initial condition is 
    
    x_0 ~ N(0, sigma ^ 2 / (1 - rho ^ 2))

    Persistence parameter must lie in (-1, 1)
    for an AR(1) to be simulated. If rho = 1,
    a random walk is simulated with the initial
    condition x_0 = 0.
    
    Args:
        rho (float): AR(1) persistence parameter
        sigma (float): Standard deviation of shocks
        T (int): Length of simulated time series
        shocks (array, optional): If provided,
            use the time series in shocks for the disturbances (eps)
    
    Returns:
        dict: Dictionary, contains:
            shocks (float): Simulated shocks (eps)
            x (float): Simulated time series
    """
    assert rho > - 1 and rho <= 1, \
        'Persistence parameter should be in (-1, 1] (random walk possible).'

    if shocks is None:
        shocks = np.random.randn(1, T).flatten() * sigma
        if rho == 1:
            shocks[0] = 0
        else:
            shocks[0] = np.random.randn(1, 1) * sigma / np.sqrt(1 - rho ** 2)
    return {'shocks': shocks,
            'x': scipy.signal.lfilter([1] ,[1, -rho], shocks)}


def get_kalman_gain(rho, sigma_eps, sigma_omega):
    """Calculate the Kalman gain for a state-space model
    in which target variable x_t follows an AR(1) 
    process with

    x_t = rho x_{t - 1} + eps_t, eps_t ~ N(0, sigma_eps ** 2)

    and the observation equation is 
    
    s_t = x_t + omega_t

    where omega_t ~ N(0, sigma_omega ** 2), and s_t
    is the observed signal.
    
    Args:
        rho (float): Persistence of x_t
        sigma_eps (float): Standard deviation of shocks to x_t
        sigma_omega (float): Standard deviation of shocks to s_t
    
    Returns:
        dict: Dictionary, contains:
            shocks (float): Simulated shocks (eps)
            x (float): Simulated time series
    """
    assert rho > - 1 and rho < 1, \
        'Persistence parameter should be in (-1, 1).'
    assert sigma_eps > 0 and sigma_omega > 0, \
        'Standard deviations should be positive.'
        
    Sigma = 0.5 * (-(1 - rho ** 2) * sigma_omega ** 2 
                   + sigma_eps ** 2 
                   + np.sqrt(((1 - rho ** 2) * sigma_omega ** 2 - sigma_eps ** 2) ** 2
                       + 4 * sigma_eps ** 2 * sigma_omega ** 2))
    
    return Sigma / (Sigma + sigma_omega ** 2)


def simulate_target_variable(rho, sigma_eps, sigma_omega, T):
    """Simulate data for the target variable
    x_t and signal s_t. The target variable is
    
    x_t = rho x_{t - 1} + eps_t, eps_t ~ N(0, sigma_eps ** 2)
    
    and the signal s_t is generated by
    
    s_t = x_t + omega_t,
    
    where omega_t ~ N(0, sigma_omega ** 2).
    
    Args:
        rho (double): Persistence of x_t
        sigma_eps (double): Standard deviation of shocks to x_t
        sigma_omega (double): Standard deviatoin of shocks to s_t
        T (int): Sample size
    
    Returns:
        df: Pandas dataframe
    """
    assert rho > 0 and rho < 1, \
        'Persistence should be in (0, 1).'
    assert sigma_eps > 0 and sigma_omega > 0, \
        'Standard deviations should be positive.'
    assert isinstance(T, int) and T > 0, \
        'T should be a positive integer.'

    df = pd.DataFrame()

    # Simulate data for x_t
    simulated_data = simulate_AR1(rho, sigma_eps, T)
    df['x'] = simulated_data['x']
    df['eps'] = simulated_data['shocks']

    # Simulate signal
    df['omega'] = np.random.randn(1, T).flatten() * sigma_omega
    df['s'] = df['x'] + df['omega']

    return df


def generate_forecasts(signal, rho_perceived, 
        sigma_eps_perceived, sigma_omega_perceived, theta):
    """Generate subjective expectations given a 
    realized vector of signals (in signal), perceived
    parameters of the process {rho_perceived,
    sigma_eps_perceived, sigma_omega_perceived},
    and diagnosticity parameter theta.
    
    Args:
        signal (np array): Vector of signals
        rho_perceived (double): Perceived persistence of x_t
        sigma_eps_perceived (double): Perceived standard deviation of shocks to x_t
        sigma_omega_perceived (double): Perceived standard deviation of shocks to s_t
        theta (double): Diagnosticity parameter
    
    Returns:
        dict: Dictionary, contains:
                forecast (np array): Forecasts
                forecast_non_diag (np array): Non-diagnostic component of forecasts 
    """
    assert type(signal) is np.ndarray, \
        'signal must be a numpy array'

    assert signal.ndim == 1, \
        'signal must be a vector.'

    assert theta >= 0, \
        'Diagnosticity parameter theta must be weakly positive.'

    # Calculate Kalman gain
    G = get_kalman_gain(rho = rho_perceived, 
                        sigma_eps = sigma_eps_perceived, 
                        sigma_omega = sigma_omega_perceived)

    # Recursively construct non-diagnostic
    # component of forecasts
    forecast_non_diag = []
    surprise = []
    
    for ii in range(len(signal)):
        if ii == 0:
            forecast_non_diag.append(rho_perceived * G * signal[ii])
            surprise.append(0.0)
        else:
            forecast_non_diag.append(rho_perceived * G * signal[ii] 
                                   + rho_perceived * (1 - G) * forecast_non_diag[ii - 1])
            surprise.append(signal[ii] - forecast_non_diag[ii - 1])
    
    # Calculate full forecasts
    forecast_non_diag = np.array(forecast_non_diag)
    surprise = np.array(surprise)
    forecast = forecast_non_diag + theta * rho_perceived * G * surprise

    return {'forecast': forecast,
            'forecast_non_diag': forecast_non_diag}


def simulate_dataset(params_DGP, params_expectations):
    """Simulate a dataset containing realizations
    of the target variable, signal, shocks, and expectaitons
    
    Args:
        params_DGP (dict): Parameters of the data-generating process
        params_expectations (dict): Parameters of expectation formation
    
    Returns:
        df: Dataframe, contains:
            x: Target variable
            s: Signa
            eps: Shocks to target variable
            omega: Shocks to signal
            forecast: Forecast (aligned with the date for which the forecast is made)
            forecast_non_diag: Non-diagnostic component of expectations
            forecast_error: Forecast error  
    """
    df = simulate_target_variable(rho = params_DGP['rho'], 
                                  sigma_eps = params_DGP['sigma_eps'], 
                                  sigma_omega = params_DGP['sigma_omega'], 
                                  T = params_DGP['T'])
    forecasts = generate_forecasts(signal = df['s'].values, 
                                   rho_perceived = params_expectations['rho_perceived'], 
                                   sigma_eps_perceived = params_expectations['sigma_eps_perceived'], 
                                   sigma_omega_perceived = params_expectations['sigma_omega_perceived'], 
                                   theta = params_expectations['theta'])
    df['forecast'] = forecasts['forecast']
    df['forecast_non_diag'] = forecasts['forecast_non_diag']

    # Align forecasts with the date for which they are made
    for var in ['forecast', 'forecast_non_diag']:
        df[var] = df[var].shift(1)

    # Calculate forecast errors
    df['forecast_error'] = df['x'] - df['forecast']

    return df


def calculate_shock_specific_bias_coefs(params_DGP, params_expectations, L):
    """Calculate shock-specific bias coefficients
    using theoretical closed-form formulas
    
    Args:
        params_DGP (dict): Parameters of the data-generating process
        params_expectations (dict): Parameters of expectation formation
        L (int): Number of bias coefficients to calculate
    
    Returns:
        dict: Dictionary, contains:
            aggregate (np array): Bias coefficients w.r.t. aggregate shock
            idiosyncratic (np array): Bias coefficients w.r.t. idiosyncratic shock
    """
    # Unpack parameters
    
    # DGP parameters   
    rho = params_DGP['rho']

    # Expectations parameters
    rho_perceived = params_expectations['rho_perceived']
    sigma_eps_perceived = params_expectations['sigma_eps_perceived']
    sigma_omega_perceived = params_expectations['sigma_omega_perceived']
    theta = params_expectations['theta']

    # Calculate Kalman gain
    G = get_kalman_gain(rho_perceived, sigma_eps_perceived, sigma_omega_perceived)

    # Bias coefficients w.r.t. aggregate shock
    L = np.array(range(1, L + 1))
    b_agg = (rho_perceived * G * (rho_perceived * (1 - G)) ** L + rho ** L * (rho - rho_perceived)
        + theta * rho_perceived * G * (rho ** (L - 1) * (rho_perceived - rho)
                                       - rho_perceived ** L * G * (1 - G) ** (L - 1)))
    b_agg = b_agg / (rho_perceived * (1 - G) - rho)

    # Bias coefficients w.r.t. idiosyncratic shock
    b_id = (rho_perceived ** 2 * G * (rho_perceived * (1 - G)) ** (L[1:] - 2) 
        * (1 - (1 + theta) * G)) # For l >= 2
    b_id = np.insert(b_id, 0, rho_perceived * G * (1 + theta)) # For l = 1

    return {'aggregate': b_agg,
            'idiosyncratic': b_id}


def get_autocovar_MA(theta, sigma):
    """Calculate the autocovariance
    function of an MA(Q) process:

    x_t = e_t + theta_1 e_{t-1} + ... + theta_Q e_{t-Q},

    where

    e_t ~ iid N(0, sigma ** 2).

    Here, theta = (1, theta_1, ..., theta_Q)
    is a vector of length (Q + 1) containing
    all the constants multiplying the innovations.  
    
    Args:
        theta (np array): MA coefficients (with theta[0] = 1)
        sigma (float): Standard deviation of innovations
    
    Returns:
        np array: Vector of length (Q + 1) containing the
            autocovariances (g_0, g_1, ..., g_Q)
            with g_k = Cov(x_t, x_{t-k}) for k = 0, 1, ..., Q.
    """
    assert type(theta) is np.ndarray, \
        'theta must be a numpy array'

    assert theta.ndim == 1, \
        'theta must be a vector.'

    assert theta[0] == 1, \
        'The first element of theta must be 1.'

    assert len(theta) > 1, \
        'Theta must have length greater than 1.'

    assert sigma > 0, \
        'Standard deviation of innovations should be positive.'

    # Calculate autocovariance function
    Q1 = len(theta) # (Q + 1)
    res = []
    for jj in range(0, Q1):
        res.append(sigma ** 2 * np.dot(theta[0:(Q1 - jj)], theta[jj:]))
    return np.array(res)


def get_MA_coefs_sum(theta_1, sigma_1, theta_2, sigma_2):
    """Get the MA(2) representation of

    x_t = y_t + z_t

    where y_t and z_t are independent
    MA(2) processes with parameters
    (theta_1, sigma_1) and (theta_2, sigma_2).
    
    Args:
        theta_1 (np array): MA coefficients of y_t
        sigma_1 (float): Standard deviation of innovations to y_t
        theta_2 (np array): MA coefficients of z_t
        sigma_2 (float): Standard deviation of innovations to z_t
    
    Returns:
        dict: Dictionary, contains:
            theta (np array): MA coefficients of x_t
            sigma (float): Standard deviation of innovations to x_t 
    """
    assert len(theta_1) == len(theta_2), \
        'MA processes must have the same length.'
    
    # Calculate autocovariances of the original process
    # and deduce the autocovariance of the sum process
    autocov_1 = get_autocovar_MA(theta_1, sigma_1)
    autocov_2 = get_autocovar_MA(theta_2, sigma_2)
    autocov = autocov_1 + autocov_2

    root_fun = lambda x: (get_autocovar_MA(np.concatenate(([1], x[1:])), x[0]) 
                            - autocov)
    
    # Get reasonable starting values
    w = autocov_1[0] / (autocov_1[0] + autocov_2[0])
    init_vals = w * theta_1[1:] + (1 - w) * theta_2[1:]
    init_vals = np.concatenate(([w * sigma_1 + (1 - w) * sigma_2], init_vals))

    # Find MA representation by solving a 
    # polynomical equation numerically
    MA_rep_sol = scipy.optimize.root(root_fun, init_vals)
    
    # Check if optimization coverged
    if not MA_rep_sol['success']:
        raise ValueError('Optimization unsuccessful.')
    else:
        theta = MA_rep_sol['x'][1:]
        theta = np.insert(theta, 0, 1.0)

        # Check if the MA representation is invertible.
        # If not, calculate the invertible representation
        # manually using tools in statsmodels
        arma_process = sm.tsa.ArmaProcess(ar = np.array([1]),
                                          ma = theta)
        if not arma_process.isinvertible:
            warnings.warn('Non-invertible representation of MA process found. Converting to an invertible representation.')
            theta = arma_process.invertroots()[0]

            # If some MA coefficients are zero after
            # caculating the invertible representation,
            # pad with zeros to retain the same dimensions
            if len(theta) < len(theta_1):
                theta = np.pad(theta, (0, len(theta_1) - len(theta)), 'constant')
        
        return({'theta': theta,
                'sigma': MA_rep_sol['x'][0]})


def get_IRF_ARMA22(phi_1, phi_2, theta_1, theta_2, L):
    """Calculate the impulse response function for
    an ARMA(2, 2) process:

    x_t - phi_1 x_{t-1} - phi_2 x_{t-2} = e_t + theta_1 e_{t - 1} + theta_2 e_{t - 2}

    where e_t are mean zero IID shocks
    
    Args:
        phi_1 (double): First AR coefficient
        phi_2 (double): Second AR coefficient
        theta_1 (double): First MA coefficient
        theta_2 (double): Second MA coefficient
        L (int): Number of periods for the IRF
    
    Returns:
        np array: Impulse response function
    """
    assert (isinstance(phi_1, float) 
        and isinstance(phi_2, float)
        and isinstance(theta_1, float)
        and isinstance(theta_2, float)), \
        'Parameters should be floats.'

    assert isinstance(L, int) and L > 0, \
        'L should be a positive integer.'

    # Calculate the IRF recursively
    IRF = []
    for ll in range(L):
        if ll == 0:
            IRF.append(1.0)
        elif ll == 1:
            IRF.append(phi_1 * IRF[ll - 1] + theta_1)
        elif ll == 2:
            IRF.append(phi_1 * IRF[ll - 1] + phi_2 * IRF[ll - 2] + theta_2)
        elif ll >= 3:
            IRF.append(phi_1 * IRF[ll - 1] + phi_2 * IRF[ll - 2])
    return np.array(IRF)


def calculate_composite_bias_coefs(params_DGP, params_expectations, L, return_sigma = False):
    """Calculate composite bias coefficients.
    
    Args:
        params_DGP (dict): Parameters of the data-generating process
        params_expectations (dict): Parameters of expectation formation
        L (int): Number of bias coefficients to calculate
        return_sigma (bool, optional): If true, return the standard deviation
            of the implied univariate innovations
    
    Returns:
        np array: Composite bias coefficients
    """
    # Unpack parameters
    
    # DGP parameters   
    rho = params_DGP['rho']
    sigma_eps = params_DGP['sigma_eps']
    sigma_omega = params_DGP['sigma_omega']

    # Expectations parameters
    rho_perceived = params_expectations['rho_perceived']
    sigma_eps_perceived = params_expectations['sigma_eps_perceived']
    sigma_omega_perceived = params_expectations['sigma_omega_perceived']
    theta = params_expectations['theta']
    
    # Calculate Kalman gain
    G = get_kalman_gain(rho_perceived, sigma_eps_perceived, sigma_omega_perceived)

    # Calculate MA(2) representation
    theta_1 = np.array([1, 
                        - rho_perceived * (1 + theta * G),
                        + rho_perceived ** 2 * theta * G])
    sigma_1 = sigma_eps

    theta_2 = np.array([1,
                        -(rho + theta * (rho_perceived + rho)) / (1 + theta),
                        (theta * rho * rho_perceived) / (1 + theta)])
    sigma_2 = (rho_perceived * G * (1 + theta)) * sigma_omega
    MA_rep = get_MA_coefs_sum(theta_1, sigma_1, theta_2, sigma_2)

    # Calculate the univariate IRF
    IRF = get_IRF_ARMA22(phi_1 = rho_perceived * (1 - G) + rho, 
                         phi_2 = - rho_perceived * rho * (1 - G), 
                         theta_1 = MA_rep['theta'][1], 
                         theta_2 = MA_rep['theta'][2], 
                         L = L + 1)

    # Convert to bias coefficients
    bias_coefs = -IRF[1:]
    
    if not return_sigma:
        return bias_coefs
    
    else: return{'bias_coefficients': bias_coefs,
                 'sigma': MA_rep['sigma'],
                 'MA_rep': MA_rep}


def calculate_bias_coefs(params_DGP, params_expectations, L):
    """Wrapper  function to calculate composite and shock-specific
    bias coefficients for a noisy-information model
    with an AR(1) fundamental, misperceived
    parameters, and diagnostic expectations.
    
    Args:
        params_DGP (dict): Parameters of the data-generating process
        params_expectations (dict): Parameters of expectation formation
        L (int): Number of bias coefficients to calculate
    
    Returns:
        df: Dataframe, contains:
            L: Number of bias coefficient
            composite: Composite bias coefficients
            aggregate: Aggregate bias coefficients
            idiosyncratic: Idiosyncratic bias coefficients
    """
    composite = calculate_composite_bias_coefs(params_DGP, params_expectations, L)
    shock_specific = calculate_shock_specific_bias_coefs(params_DGP, params_expectations, L)
    df = pd.DataFrame()
    df['L'] = np.array(range(1, L + 1))
    df['composite'] = composite
    df['aggregate'] = shock_specific['aggregate']
    df['idiosyncratic'] = shock_specific['idiosyncratic']
    return df


def calculate_composite_bias_coefs_noisy(params_DGP, params_expectations, L, sigma_m):
    """Calculate composite bias coefficients when
    measurement error is present.
    
    Args:
        params_DGP (dict): Parameters of the data-generating process
        params_expectations (dict): Parameters of expectation formation
        L (int): Number of bias coefficients to calculate
        sigma_m (double): Standard deviation of noise
    
    Returns:
        np array: Composite bias coefficients
    """
    # Unpack parameters
    
    # DGP parameters   
    rho = params_DGP['rho']
    sigma_eps = params_DGP['sigma_eps']
    sigma_omega = params_DGP['sigma_omega']

    # Expectations parameters
    rho_perceived = params_expectations['rho_perceived']
    sigma_eps_perceived = params_expectations['sigma_eps_perceived']
    sigma_omega_perceived = params_expectations['sigma_omega_perceived']
    theta = params_expectations['theta']
    
    # Calculate Kalman gain
    G = get_kalman_gain(rho_perceived, sigma_eps_perceived, sigma_omega_perceived)

    # Calculate composite bias coefficients and the MA(2)
    # component without measurement error
    bias_coefs_no_error = calculate_composite_bias_coefs(params_DGP, 
        params_expectations, L, return_sigma = True)

    # Calculate MA(2) component with measurement error

    # MA(2) from normal expectations
    theta_1 = bias_coefs_no_error['MA_rep']['theta']
    sigma_1 = bias_coefs_no_error['MA_rep']['sigma']

    # MA(2) from measurement error
    theta_2 = np.array([1,
                        -(rho_perceived * (1 - G) + rho),
                        rho_perceived * rho * (1 - G)])
    sigma_2 = sigma_m

    # Calculate the MA(2) representation for the sum
    MA_rep = get_MA_coefs_sum(theta_1, sigma_1, theta_2, sigma_2)

    # Calculate the univariate IRF
    IRF = get_IRF_ARMA22(phi_1 = rho_perceived * (1 - G) + rho, 
                         phi_2 = - rho_perceived * rho * (1 - G), 
                         theta_1 = MA_rep['theta'][1], 
                         theta_2 = MA_rep['theta'][2], 
                         L = L + 1)

    # Convert to bias coefficients
    bias_coefs = -IRF[1:]

    return bias_coefs


def calculate_SSR(df_empirical, params_DGP, rho_perceived, sigma_omega_perceived, theta):
    """Calculate sum of squared residuals between
    empirical estimates of bias coefficients and
    theoretical predictions
    
    Args:
        df_empirical (df): Empirical estiamtes
        params_DGP (dict): Parameters of the DGP
        rho_perceived (float): Perceived persistence
        sigma_omega_perceived (float): Perceived std. dev. of idiosyncratic shocks
        theta (float): Diagnosticity parameter
    
    Returns:
        dict: Sum of squared residuals
    """
    
    # Collect expectations parameters
    params_expectations = {'rho_perceived': rho_perceived,
                           'sigma_eps_perceived': 1.0, # Normalization
                           'sigma_omega_perceived': sigma_omega_perceived,
                           'theta': theta}
    
    # Calculate theoretical bias coefficients
    max_L = df_empirical.shape[0]
    bias_coefs_theory = calculate_shock_specific_bias_coefs(params_DGP, params_expectations, L = max_L + 1)
    
    # Normalize idiosyncratic bias coefficients
    # and use the same timing convention as with 
    # empirical estimates
    bias_coefs_theory['idiosyncratic'] = bias_coefs_theory['idiosyncratic'] / bias_coefs_theory['idiosyncratic'][0]
    bias_coefs_theory['idiosyncratic'] = bias_coefs_theory['idiosyncratic'][1:]
    bias_coefs_theory['aggregate'] = bias_coefs_theory['aggregate'][0:(-1)]
    
    # Calculate SSRs
    SSR_agg = ((bias_coefs_theory['aggregate'] - df_empirical['aggregate_empirical']) ** 2).sum()
    SSR_idio = ((bias_coefs_theory['idiosyncratic'] - df_empirical['idiosyncratic_empirical']) ** 2).sum()
    SSR = SSR_agg + SSR_idio 
    
    return {'SSR': SSR,
            'SSR_agg': SSR_agg,
            'SSR_idio': SSR_idio}


def fit_model(df_empirical, params_DGP, model, objective = 'SSR', eps = 1e-6):
    """Fit empirical bias coefficients to recover
    structural parameters of models of expectation
    formation
    
    Args:
        df_empirical (df): Empirical estiamtes
        params_DGP (dict): Parameters of the DGP
        model (str): Model name
        objective (str, optional): Objective function
        eps (float, optional): Numerical tolerance
    
    Returns:
        dict: Model fit results
    """
    # Specificy objective functions
    # and constraints for the different models
    if model == 'baseline_noisy':
        obj_fun = lambda x: calculate_SSR(df_empirical,
                                          params_DGP = params_DGP, 
                                          rho_perceived = params_DGP['rho'], 
                                          sigma_omega_perceived = x[0], 
                                          theta = 0.0)[objective]
        bounds = [(eps, np.inf)]
        x0 = 1.0
    if model == 'misperception':
        obj_fun = lambda x: calculate_SSR(df_empirical,
                                          params_DGP = params_DGP, 
                                          rho_perceived = x[0], 
                                          sigma_omega_perceived = eps, 
                                          theta = 0.0)[objective]
        bounds = [(eps, 1 - eps)]
        x0 = np.array([params_DGP['rho']])
    if model == 'noisy_misperception':
        obj_fun = lambda x: calculate_SSR(df_empirical,
                                          params_DGP = params_DGP, 
                                          rho_perceived = x[0], 
                                          sigma_omega_perceived = x[1], 
                                          theta = 0.0)[objective]
        bounds = ((eps, 1 - eps), 
                  (eps, np.inf))
        x0 = np.array([params_DGP['rho'], 1])
    if model == 'noisy_diagnostic':
        obj_fun = lambda x: calculate_SSR(df_empirical,
                                          params_DGP = params_DGP, 
                                          rho_perceived = params_DGP['rho'], 
                                          sigma_omega_perceived = x[1], 
                                          theta = x[0])[objective]
        bounds = ((eps, np.inf), 
                  (eps, np.inf))
        x0 = np.array([1, 1])
        
    if model == 'FIRE':
        G = 1.0
        SSR = calculate_SSR(df_empirical,
                            params_DGP = params_DGP, 
                            rho_perceived = params_DGP['rho'], 
                            sigma_omega_perceived = eps, 
                            theta = 0.0)
        
        return {'model': model,
                'theta': 0.0,
                'rho_perceived': params_DGP['rho'],
                'G': G,
                'SSR': SSR['SSR'],
                'SSR_agg': SSR['SSR_agg'],
                'SSR_idio': SSR['SSR_idio']}
    
    # Optimize
    optim_res = scipy.optimize.minimize(obj_fun, x0, bounds = bounds)
    
    # Collect parameter estimates
    if model == 'baseline_noisy':
        theta = 0.0
        rho_perceived = params_DGP['rho']
        sigma_omega_perceived = optim_res.x[0]
    if model == 'misperception':
        theta = 0.0
        rho_perceived = optim_res.x[0]
        sigma_omega_perceived = eps    
    if model == 'noisy_misperception':
        theta = 0.0
        rho_perceived = optim_res.x[0]
        sigma_omega_perceived = optim_res.x[1]    
    if model == 'noisy_diagnostic':
        theta = optim_res.x[0]
        rho_perceived = params_DGP['rho']
        sigma_omega_perceived = optim_res.x[1]    
    
    # Calculate output statistics
    SSR = calculate_SSR(df_empirical,
                        params_DGP = params_DGP, 
                        rho_perceived = rho_perceived, 
                        sigma_omega_perceived = sigma_omega_perceived, 
                        theta = theta)
    G = get_kalman_gain(rho = rho_perceived, 
                        sigma_eps = 1.0, 
                        sigma_omega = sigma_omega_perceived)
    
    # Return results
    return {'model': model,
            'theta': theta,
            'rho_perceived': rho_perceived,
            'G': G,
            'SSR': SSR['SSR'],
            'SSR_agg': SSR['SSR_agg'],
            'SSR_idio': SSR['SSR_idio']}


def get_boostrap_dataset(df_ind, block_length):
    """Construct a bootstrap dataset using the
    moving-blocks bootstrap
    
    Args:
        df_ind (df_ind): Individual-level dataset
        block_length (int): Block length
    
    Returns:
        dict: Contains:
            consensus: Consensus-level bootstrap dataset
            individual: Individual-level boostrap dataset
    """
    assert isinstance(block_length, int) and block_length > 0, \
        'block_length should be a positive integer.'

    df = df_ind.copy()

    # Create a time index for later resampling
    df['DATE'] = pd.to_datetime(df['DATE'].apply(lambda x: x.replace(':0', 'Q')))
    df = df.sort_values(['ID', 'DATE'], ascending = True)
    min_date = df['DATE'].min()
    df['T'] = (df['DATE'].dt.year * 4 + df['DATE'].dt.quarter - (min_date.year * 4 + min_date.quarter)) + 1

    # Calculate the number of blocks
    num_blocks = int(np.ceil(len(df['T'].unique()) / block_length))

    # Sample blocks with replacement by drawing a random
    # time index t, and then selecting all data with time
    # indices from t to (t + block_length - 1)

    # Calculate the highest such t
    # so that we can select block_length
    # observations. If T is the time-series dimension,
    # we are looking for index t* that satisfies
    # T = t* + block_length - 1 ==> t* = T - block_length + 1.
    max_index = df['T'].max() - block_length + 1

    # Draw random starting indices
    start_ind = np.random.choice(range(1, max_index + 1), num_blocks) 

    # Construct bootstrap dataset
    df_bootstrap = pd.DataFrame()
    for ii, tt in enumerate(start_ind):
        mask = (df['T'] >= tt) & (df['T'] <= tt + block_length - 1)
        df_temp = df.loc[mask, ].copy()
        df_temp['block'] = (ii + 1)
        df_bootstrap = df_bootstrap.append(df_temp) 

    # Create a time index for the bootstrap dataset
    df_bootstrap = df_bootstrap.sort_values(['ID', 'block'])
    df_bootstrap['T'] = np.tile(np.array(range(1, num_blocks * block_length + 1)), len(df['ID'].unique()))
    del df_bootstrap['block'], df_bootstrap['DATE']

    # Get consensus dataset
    var_list = ['SPFfor_Step{}'.format(ll) for ll in range(1, 5 + 1)]
    var_list.append('Realiz1')
    df_consensus = df_bootstrap.groupby('T').median()[var_list].reset_index()

    return {'individual': df_bootstrap,
            'consensus':  df_consensus}


def construct_leads_lags(df, K, var_names = None, group_ID = None):
    """Construct K leads and lags for
    variables in dataframe df
    
    Args:
        df (dataframe): Initial dataframe
        K (int): Number of lags and leads
        var_names (list, optional): Variables for which
            to calculate leads and lags
        group_ID (None, optional): Variable
            by which to group when calculating leads/lags
            (e.g., forecaster ID)
    
    Returns:
        dataframe: Dataframe with lags
            and leads
    """
    res = df.copy()
    if var_names is None:
        var_names = list(df.columns.values)
    if group_ID is not None:
        var_names = [var for var in var_names if var != group_ID]
    for var in var_names:
        for ll in range(1, K + 1):
            var_name_lag  = var + '_LAG{}'.format(ll)
            var_name_lead = var + '_LEAD{}'.format(ll)
            if group_ID is not None:
                res[var_name_lag] = res.loc[:, [var, group_ID]].groupby(group_ID).shift(ll).loc[:, var]
                res[var_name_lead] = res.loc[:, [var, group_ID]].groupby(group_ID).shift(-ll).loc[:, var]
            else:
                res[var_name_lag] = res[var].shift(ll)
                res[var_name_lead] = res[var].shift(-ll)
    return res


def estimate_IRF_local_projection(df, var_name, max_L, 
        max_K, fe_var = None):
    """Estimate the impulse response function
    of a single time series using local projections.
    The function estimates the following regression
    by ordinary least squares:

        x_{t + s} = b_1 x_{t - 1} + b_2 x_{t - 2} + ... + b_K x_{t - K} + u_{t + s}
    
        for each s = 0, 1, ..., (max_L - 1).
    
        Then, the impulse response function is
        approximated by the max_L estimated
        b_1 coefficients
    
    Args:
        df (df): Dataset
        var_name (str): Variable for which the impulse 
            response function is calcualted
        max_L (int): Number of impulse responses
        max_K (int): Number of lags to include when
            estimating local projections
        fe_var (str, optional): ID variable for fixed effects
    
    Returns:
        dict: Contains:
            IRF: Estimated impulse response coefficeint
            lag: Lag number
    """
    assert isinstance(max_K, int) and max_K >= 1, \
        'max_K must be an integer that is >= 1'
    assert isinstance(max_L, int) and max_L >= 1, \
        'max_L must be an integer that is >= 1'

    # Construct leads and lags
    df_temp = df.copy()
    df_temp = construct_leads_lags(df_temp, 
        np.max([max_K, max_L]), var_names = [var_name], 
        group_ID = fe_var)
    df_temp['{}_LEAD0'.format(var_name)] = df_temp[var_name]

    res = []
    for ss in range(0, max_L):
        
        # Construct the LHS of regression formula
        formula = '{var_name}_LEAD{ss} ~ 1'.format(var_name = var_name, 
                                                   ss = ss)
        
        # Construct the RHS of regression formula
        for ll in range(1, max_K + 1):
            formula += ' + {var_name}_LAG{ll}'.format(var_name = var_name,
                                                      ll = ll)
        if fe_var is not None:
            formula += '+ C({})'.format(fe_var)
        
        # Estimate the model and collect results
        mod = smf.ols(formula = formula, data = df_temp)
        mod_res = mod.fit()
        irf_coef = mod_res.params['{}_LAG1'.format(var_name)]
        res.append({'IRF': irf_coef,
                    'lag': ss + 1})
    
    return pd.DataFrame(res)


def fit_all_models(df_bias_coefs, params_DGP):
    """Wrapper function to fit all models
    (calibration exercise)
    
    Args:
        df_bias_coefs (df): Bias coefficients
        params_DGP (dict): DGP parameters
    
    Returns:
        df: Parameter estimates
    """
    res_baseline_noisy = fit_model(df_empirical = df_bias_coefs, 
                               params_DGP = params_DGP, 
                               model = 'baseline_noisy')
    res_misperception = fit_model(df_empirical = df_bias_coefs, 
                                  params_DGP = params_DGP, 
                                  model = 'misperception')
    res_noisy_misperception = fit_model(df_empirical = df_bias_coefs, 
                                        params_DGP = params_DGP, 
                                        model = 'noisy_misperception')
    res_noisy_diagnostic = fit_model(df_empirical = df_bias_coefs, 
                                     params_DGP = params_DGP, 
                                     model = 'noisy_diagnostic')
    res_FIRE = fit_model(df_empirical = df_bias_coefs, 
                         params_DGP = params_DGP, 
                         model = 'FIRE')
    res = [res_baseline_noisy,
           res_misperception,
           res_noisy_misperception,
           res_noisy_diagnostic,
           res_FIRE]
    return pd.DataFrame(res)

